load data

geno <- read_delim("tables/TableS1_genotypes.tsv")
pheno <- read_delim("tables/TableS2_phenotypes_metadata.tsv")
cipro_antibiogram <- full_join(geno,pheno) %>%
  mutate(SRnwt = if_else(Resistance.phenotype=="I", "NWT", Resistance.phenotype)) %>%
  mutate(SRnwt = factor(SRnwt, levels=c("S","NWT","R")))

set colours

wt_colours <- c(`non-wt (I/R)`="IndianRed", `wt (S)`= "LightBlue")
res_colours <- c("I"="grey", "R"="IndianRed", "S"="LightBlue", "NWT"="grey")

long form, one row per strain/determinant combination

cipro_res_mech <- cipro_antibiogram %>%
  mutate(Aac6_gene=if_else(aac6 == 1, "aac(6')-Ib-cr", "-")) %>%
  pivot_longer(c(Flq_mutations,Flq_acquired, Aac6_gene), names_to = "marker.type", 
               values_to = "marker") %>%
  separate_longer_delim(cols=marker, delim=";")

Numbers reported in text - para 3

# effect of ParC when added to GyrA-83 background (absence of any genes or GyrA-87)

# MIC data
QRDR_MIC_GyrA83background_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & acquired_genes==0 & `GyrA-83`==1 & `GyrA-87`==0) %>% 
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, ParC_mutations, `ParC-80`, `ParC-84`)

wilcox.test(log2(as.numeric(Measurement)) ~ ParC_mutations, data=QRDR_MIC_GyrA83background_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log2(as.numeric(Measurement)) by ParC_mutations
## W = 13112, p-value = 0.001161
## alternative hypothesis: true location shift is not equal to 0
summary(lm(log2(as.numeric(Measurement)) ~ ParC_mutations, data=QRDR_MIC_GyrA83background_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ ParC_mutations, 
##     data = QRDR_MIC_GyrA83background_noGenes_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7069 -0.0915 -0.0915  0.2931  7.9085 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      0.7069     0.1404   5.033 6.26e-07 ***
## ParC_mutations   0.3846     0.1472   2.613  0.00918 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.07 on 646 degrees of freedom
## Multiple R-squared:  0.01046,    Adjusted R-squared:  0.008928 
## F-statistic: 6.829 on 1 and 646 DF,  p-value: 0.00918
summary(as.numeric(QRDR_MIC_GyrA83background_noGenes_dat$Measurement)[QRDR_MIC_GyrA83background_noGenes_dat$`ParC_mutations`==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.250   2.000   2.000   4.692   2.000 512.000
summary(as.numeric(QRDR_MIC_GyrA83background_noGenes_dat$Measurement)[QRDR_MIC_GyrA83background_noGenes_dat$`ParC_mutations`==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.125   1.000   1.000   2.894   4.000  16.000
# disk diffusion
QRDR_DD_GyrA83background_noGenes_dat <- cipro_antibiogram %>%
  filter(aac6==0 & acquired_genes==0 & `GyrA-83`==1 & `GyrA-87`==0) %>% 
  filter(Laboratory.Typing.Method=="Disk diffusion") %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, ParC_mutations, `ParC-80`, `ParC-84`)

wilcox.test(as.numeric(Measurement) ~ ParC_mutations, data=QRDR_DD_GyrA83background_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(Measurement) by ParC_mutations
## W = 1112.5, p-value = 5.475e-09
## alternative hypothesis: true location shift is not equal to 0
summary(lm(as.numeric(Measurement) ~ ParC_mutations, data=QRDR_DD_GyrA83background_noGenes_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ ParC_mutations, data = QRDR_DD_GyrA83background_noGenes_dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -7.870 -3.585 -1.727  3.202 20.415 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      18.870      1.006   18.76  < 2e-16 ***
## ParC_mutations   -9.285      1.204   -7.71 4.58e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.823 on 74 degrees of freedom
## Multiple R-squared:  0.4454, Adjusted R-squared:  0.4379 
## F-statistic: 59.44 on 1 and 74 DF,  p-value: 4.582e-11
summary(as.numeric(QRDR_DD_GyrA83background_noGenes_dat$Measurement)[QRDR_DD_GyrA83background_noGenes_dat$ParC_mutations==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   6.000   6.000   6.000   9.585  13.000  30.000
summary(as.numeric(QRDR_DD_GyrA83background_noGenes_dat$Measurement)[QRDR_DD_GyrA83background_noGenes_dat$ParC_mutations==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.00   16.00   20.00   18.87   22.00   25.00
# test categorial phenotypes

# all genomes with no acquired genes (MIC/DD)
QRDR_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & acquired_genes==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, QRDR_mutations) %>%
  mutate(qrdr_bin=if_else(QRDR_mutations==0, 0, 1))

# nonWT vs presence/absence
fisher.test(table(QRDR_noGenes_dat$nonWT_binary, QRDR_noGenes_dat$qrdr_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(QRDR_noGenes_dat$nonWT_binary, QRDR_noGenes_dat$qrdr_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   434.3185 1006.3628
## sample estimates:
## odds ratio 
##   659.1292
# resistance vs presence/absence
fisher.test(table(QRDR_noGenes_dat$resistant, QRDR_noGenes_dat$qrdr_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(QRDR_noGenes_dat$resistant, QRDR_noGenes_dat$qrdr_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   866.9346 1947.9686
## sample estimates:
## odds ratio 
##   1255.155

Numbers reported in text - para 4

effect of presence/absence of any QRDR, in absence of any acquired genes

# total strains without PMQR or aac6
nrow(QRDR_noGenes_dat)
## [1] 7452
# presence of QRDR vs MIC
QRDR_MIC_noGenes_dat <- cipro_antibiogram %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
    filter(aac6==0 & acquired_genes==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, QRDR_mutations, `GyrA-83`, `ParC-80`) %>%
  mutate(qrdr_bin=if_else(QRDR_mutations==0, 0, 1))

wilcox.test(log2(as.numeric(Measurement)) ~ qrdr_bin, data=QRDR_MIC_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log2(as.numeric(Measurement)) by qrdr_bin
## W = 111224, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
summary(lm(log2(as.numeric(Measurement)) ~ qrdr_bin, data=QRDR_MIC_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ qrdr_bin, data = QRDR_MIC_noGenes_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.1749 -0.1749 -0.0585 -0.0585  7.9415 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.94146    0.01660  -116.9   <2e-16 ***
## qrdr_bin     3.11640    0.02834   110.0   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9333 on 4809 degrees of freedom
## Multiple R-squared:  0.7155, Adjusted R-squared:  0.7155 
## F-statistic: 1.21e+04 on 1 and 4809 DF,  p-value: < 2.2e-16
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$qrdr_bin==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.125   2.000   2.000   5.539   2.000 512.000
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$qrdr_bin==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0300  0.2500  0.2500  0.3471  0.2500 64.0000
# presence of QRDR vs DD zone
QRDR_DD_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & acquired_genes==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, QRDR_mutations) %>%
  mutate(qrdr_bin=if_else(QRDR_mutations==0, 0, 1))

wilcox.test(as.numeric(Measurement) ~ qrdr_bin, data=QRDR_DD_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(Measurement) by qrdr_bin
## W = 320646, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
summary(lm(as.numeric(Measurement) ~ qrdr_bin, data=QRDR_DD_noGenes_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ qrdr_bin, data = QRDR_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.6659  -1.6659   0.3341   1.3341  18.1154 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  28.66587    0.06349  451.51   <2e-16 ***
## qrdr_bin    -16.78125    0.28616  -58.64   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.181 on 2639 degrees of freedom
## Multiple R-squared:  0.5658, Adjusted R-squared:  0.5656 
## F-statistic:  3439 on 1 and 2639 DF,  p-value: < 2.2e-16
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$qrdr_bin==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00    6.00    9.00   11.88   17.00   30.00
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$qrdr_bin==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   27.00   29.00   28.67   30.00   44.00
# presence of QRDR vs nonWT
fisher.test(table(QRDR_noGenes_dat$nonWT_binary, QRDR_noGenes_dat$qrdr_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(QRDR_noGenes_dat$nonWT_binary, QRDR_noGenes_dat$qrdr_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   434.3185 1006.3628
## sample estimates:
## odds ratio 
##   659.1292
# presence of QRDR vs R
fisher.test(table(QRDR_noGenes_dat$resistant, QRDR_noGenes_dat$qrdr_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(QRDR_noGenes_dat$resistant, QRDR_noGenes_dat$qrdr_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   866.9346 1947.9686
## sample estimates:
## odds ratio 
##   1255.155

effect of number of QRDR mutations, in absence of any acquired genes

# MIC vs QRDR count
QRDR_MIC_noGenes_dat <- QRDR_MIC_noGenes_dat %>%
  mutate(QRDR_0_1_2 = if_else(QRDR_mutations>2, 2, QRDR_mutations)) %>%
  mutate(QRDR_0_1_2_3 = if_else(QRDR_mutations>3, 3, QRDR_mutations)) %>%
  mutate(QRDR_1_2 = if_else(QRDR_0_1_2==0, NA, QRDR_0_1_2)) %>%
  mutate(QRDR_2_3 = if_else(QRDR_0_1_2<2, NA, QRDR_0_1_2_3))

# median MICs, grouped by QRDR count
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$QRDR_mutations==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0300  0.2500  0.2500  0.3471  0.2500 64.0000
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$QRDR_mutations==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.125   1.000   1.000   2.592   2.000  16.000
summary(as.numeric(QRDR_MIC_noGenes_dat$Measurement)[QRDR_MIC_noGenes_dat$QRDR_mutations>2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.125   2.000   2.000   6.258   2.000 512.000
# test for difference in MIC with QRDR count
summary(lm(log2(as.numeric(Measurement)) ~ factor(QRDR_0_1_2), data=QRDR_MIC_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ factor(QRDR_0_1_2), 
##     data = QRDR_MIC_noGenes_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.2032 -0.2032 -0.0585 -0.0585  7.9415 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         -1.94146    0.01655 -117.31   <2e-16 ***
## factor(QRDR_0_1_2)1  2.49701    0.11086   22.52   <2e-16 ***
## factor(QRDR_0_1_2)2  3.14462    0.02866  109.72   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9301 on 4808 degrees of freedom
## Multiple R-squared:  0.7175, Adjusted R-squared:  0.7174 
## F-statistic:  6105 on 2 and 4808 DF,  p-value: < 2.2e-16
summary(lm(log2(as.numeric(Measurement)) ~ QRDR_mutations, data=QRDR_MIC_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ QRDR_mutations, 
##     data = QRDR_MIC_noGenes_dat)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -4.568 -0.120 -0.120 -0.120  8.581 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -1.88003    0.01725  -109.0   <2e-16 ***
## QRDR_mutations  1.14940    0.01123   102.3   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9817 on 4809 degrees of freedom
## Multiple R-squared:  0.6852, Adjusted R-squared:  0.6852 
## F-statistic: 1.047e+04 on 1 and 4809 DF,  p-value: < 2.2e-16
# DD vs QRDR count
QRDR_DD_noGenes_dat <- QRDR_DD_noGenes_dat %>%
  mutate(QRDR_0_1_2 = if_else(QRDR_mutations>2, 2, QRDR_mutations)) %>%
  mutate(QRDR_0_1_2_3 = if_else(QRDR_mutations>3, 3, QRDR_mutations)) %>%
  mutate(QRDR_1_2 = if_else(QRDR_0_1_2==0, NA, QRDR_0_1_2)) %>%
  mutate(QRDR_2_3 = if_else(QRDR_0_1_2<2, NA, QRDR_0_1_2_3))

# median DD zones, grouped by QRDR count
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$QRDR_mutations==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   27.00   29.00   28.67   30.00   44.00
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$QRDR_mutations==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   11.00   17.00   20.50   19.75   22.25   29.00
summary(as.numeric(QRDR_DD_noGenes_dat$Measurement)[QRDR_DD_noGenes_dat$QRDR_mutations>2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   6.000   6.000   6.000   6.676   6.000  12.000
# test for difference in DD zone with QRDR count
summary(lm(log2(as.numeric(Measurement)) ~ factor(QRDR_0_1_2), data=QRDR_DD_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ factor(QRDR_0_1_2), 
##     data = QRDR_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.24805 -0.07812  0.02497  0.07388  1.96617 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          4.833011   0.003863 1251.15   <2e-16 ***
## factor(QRDR_0_1_2)1 -0.564658   0.030848  -18.30   <2e-16 ***
## factor(QRDR_0_1_2)2 -1.892292   0.020766  -91.12   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1936 on 2638 degrees of freedom
## Multiple R-squared:  0.7645, Adjusted R-squared:  0.7644 
## F-statistic:  4283 on 2 and 2638 DF,  p-value: < 2.2e-16
summary(lm(log2(as.numeric(Measurement)) ~ QRDR_mutations, data=QRDR_DD_noGenes_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ QRDR_mutations, 
##     data = QRDR_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.24831 -0.07839  0.02471  0.07362  1.57431 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     4.833275   0.003884 1244.46   <2e-16 ***
## QRDR_mutations -0.750349   0.008203  -91.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1953 on 2639 degrees of freedom
## Multiple R-squared:  0.7602, Adjusted R-squared:  0.7601 
## F-statistic:  8367 on 1 and 2639 DF,  p-value: < 2.2e-16

Fig1 - number of QRDR vs MIC, facet aac6 yes/no (no other genes)

#MIC distribution for # QRDR, in absence of genes
QRDR_MIC_noGenes <- cipro_antibiogram %>%
    filter(acquired_genes==0) %>% 
    filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>% 
    mutate(aac6_label=if_else(aac6==0, "aac(6')-Ib-cr absent", "aac(6')-Ib-cr present")) %>%
    mutate(QRDR_mutations=if_else(QRDR_mutations==4, 3, QRDR_mutations)) %>% # single isolate with 4 QRDR
    ggplot(aes(x=factor(QRDR_mutations), y=as.numeric(Measurement))) + 
    geom_violin() + 
    geom_count(aes(colour = SRnwt)) + 
    geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
    geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
    facet_wrap(vars(aac6_label)) +
    scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3)) +
    scale_color_manual(values = res_colours) + 
    theme_light() + 
    labs(y="MIC (mg/L)", x="", col="Phenotype", 
         title="a) No. QRDR mutations vs phenotype",
         subtitle="(in absence of PMQR genes)")

QRDR_MIC_noGenes

QRDR_pheno_noGenes <- cipro_antibiogram %>%
    filter(acquired_genes==0) %>% 
    filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
    mutate(aac6_label=if_else(aac6==0, "aac(6')-Ib-cr absent", "aac(6')-Ib-cr present")) %>%
    mutate(QRDR_mutations=if_else(QRDR_mutations==4, 3, QRDR_mutations)) %>% # single isolate with 4 QRDR
    ggplot(aes(x=factor(QRDR_mutations), fill=SRnwt)) + 
    geom_bar(stat='count', position='fill') + 
    facet_wrap(vars(aac6_label)) +
    scale_fill_manual(values = res_colours) + 
    scale_y_continuous(labels=scales::percent_format()) +
    geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), size=2) +
    theme_light() + 
    labs(y="% Phenotype", x="Number of QRDR mutations") + 
    theme(legend.position="none", strip.background = element_blank(), strip.text = element_blank())

Fig S5 - number of QRDR vs DD, facet aac6 yes/no (no other genes)

# DD distribution for # QRDR, in absence of genes
QRDR_DD_noGenes <- cipro_antibiogram %>%
    filter(acquired_genes==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>% 
    mutate(aac6_label=if_else(aac6==0, "aac(6')-Ib-cr absent", "aac(6')-Ib-cr present")) %>%
    mutate(QRDR_mutations=if_else(QRDR_mutations==4, 3, QRDR_mutations)) %>% # single isolate with 4 QRDR
    ggplot(aes(x=factor(QRDR_mutations), y=as.numeric(Measurement))) + 
    geom_violin() + 
    geom_count(aes(colour = SRnwt)) + 
    geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
    geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
    facet_wrap(vars(aac6_label)) +
    scale_colour_manual(values = res_colours) + 
    theme_light() + 
    labs(y="Disk zone (mm)", x="", col="Phenotype",
         title="No. QRDR mutations vs phenotype",
         subtitle="(in absence of PMQR genes)")

QRDR_DDpheno_noGenes <- cipro_antibiogram %>%
    filter(acquired_genes==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
    mutate(aac6_label=if_else(aac6==0, "aac(6')-Ib-cr absent", "aac(6')-Ib-cr present")) %>%
    mutate(QRDR_mutations=if_else(QRDR_mutations==4, 3, QRDR_mutations)) %>% # single isolate with 4 QRDR
    ggplot(aes(x=factor(QRDR_mutations), fill=SRnwt)) + 
    geom_bar(stat='count', position='fill') + 
    facet_wrap(vars(aac6_label)) +
    scale_fill_manual(values = res_colours) +  
    scale_y_continuous(labels=scales::percent_format()) +
    geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), size=2) +
    theme_light() + 
    labs(y="% Phenotype", x="Number of QRDR mutations") + 
    theme(legend.position="none", strip.background = element_blank(), strip.text = element_blank())
QRDR_DD_noGenes / QRDR_DDpheno_noGenes + plot_layout(heights=c(2,1))

ggsave("figs/FigS5_numQRDR_DD_pheno.pdf", width=6, height=4)
ggsave("figs/FigS5_numQRDR_DD_pheno.png", width=6, height=4)

Fig S3 panel a - upset plot of QRDR positions, absence of aac6/acquired genes, with MIC data

upset_QRDR_noAac6_MIC <- cipro_antibiogram %>% 
  filter(aac6==0 & acquired_genes==0) %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=c("GyrA-83","GyrA-87", "ParC-80","ParC-84","wt_QRDR"), # counts are converted to binary
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45),axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing WT/nonWT
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
            theme(legend.position = "none", axis.title=element_text(size=8)),
                 scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_colour_manual(values = res_colours) + 
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^9)) +
              theme(legend.position = "none", axis.title=element_text(size=8))  +
              ggtitle("aac(6')-Ib-cr absent")
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

Fig S3 panel b - upset plot of QRDR positions, presence of aac6 but no other acquired genes, with MIC data

upset_QRDR_aac6_MIC <- cipro_antibiogram %>% 
  filter(aac6==1 & acquired_genes==0) %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=c("GyrA-83","GyrA-87", "ParC-80","ParC-84","wt_QRDR"), # counts are converted to binary
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45),axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing WT/nonWT
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
            theme(legend.position = "none", axis.title=element_text(size=8)),
              scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_colour_manual(values = res_colours) + 
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^9)) +
              labs(colour = "Phenotype") +
              theme(axis.title=element_text(size=8)) +
              ggtitle("aac(6')-Ib-cr present")
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

Fig S4 panel a - QRDR positions, absence of aac6/acquired genes, with DD data

upset_QRDR_noAac6_DD <- cipro_antibiogram %>% 
  filter(aac6==0 & acquired_genes==0) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=c("GyrA-83","GyrA-87", "ParC-80","ParC-84","wt_QRDR"), # counts are converted to binary
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45), axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
             scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_colour_manual(values = res_colours) + 
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("Disk diffusion data") +
              theme(axis.title=element_text(size=8), legend.position="none") + 
              ggtitle("aac(6')-Ib-cr absent") + ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_QRDR_noAac6_DD

Fig S4 panel b - QRDR positions, presence of aac6 but no other acquired genes, with DD data

upset_QRDR_aac6_DD <- cipro_antibiogram %>% 
  filter(aac6==1 & acquired_genes==0) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=c("GyrA-83","GyrA-87", "ParC-80","ParC-84","wt_QRDR"), # counts are converted to binary
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45), axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
               scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_color_manual(values = c("I"="grey", "R"="IndianRed", "S"="LightBlue")) + 
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("aac(6')-Ib-cr present") +
              theme(axis.title=element_text(size=8)) + ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_QRDR_aac6_DD

Supp Figs 3 and 4: QRDR sites vs pheno (MIC & DD, with/without aac6)

upset_QRDR_noAac6_MIC | upset_QRDR_aac6_MIC

ggsave("figs/FigS3_QRDR_MIC.pdf", width=9, height=6)
ggsave("figs/FigS3_QRDR_MIC.png", width=9, height=6)


upset_QRDR_noAac6_DD | upset_QRDR_aac6_DD

ggsave("figs/FigS4_QRDR_DD.pdf", width=9, height=6)
ggsave("figs/FigS4_QRDR_DD.png", width=9, height=6)

set up binary variables for common PMQR genes

# genes in at least 10 genomes
qnr_genes <- cipro_res_mech %>% 
  mutate(marker=gsub("[\\^\\*\\?]", "", marker)) %>%
  mutate(marker=gsub(".v1", "", marker)) %>%
  mutate(marker=gsub(".v2", "", marker)) %>%
  group_by(marker) %>% 
  count() %>% filter(startsWith(marker, "q")) %>% 
  filter(n>10) %>% pull(marker)

qnr_presence <- cipro_res_mech %>% 
  mutate(marker=gsub("[\\^\\*\\?]", "", marker)) %>%
  mutate(marker=gsub(".v1", "", marker)) %>%
  mutate(marker=gsub(".v2", "", marker)) %>%
  filter(marker %in% qnr_genes) %>%
  group_by(strain, marker) %>% count() %>% pivot_wider(names_from=marker, values_from=n)

cipro_antibiogram <- left_join(cipro_antibiogram, qnr_presence)

Fig S6 panel a - upset plot of qnr genes, absence of aac6 and QRDR, with MIC data

upset_qnr_noAac6_MIC <- cipro_antibiogram %>% 
  filter(aac6==0 & QRDR_mutations==0) %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=qnr_genes, 
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45),axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
            theme(legend.position = "none", axis.title=element_text(size=8)),
             scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
              scale_color_manual(values = res_colours)+
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:6), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^6)) +
              theme(legend.position = "none", axis.title=element_text(size=8))  +
              ggtitle("aac(6')-Ib-cr absent")
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

Fig S6 panel- upset plot of qnr genes, presence of aac6 but no QRDR, with MIC data

upset_qnr_aac6_MIC <- cipro_antibiogram %>% 
  filter(aac6==1 & QRDR_mutations==0) %>%
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=qnr_genes, 
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45),axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
            theme(legend.position = "none", axis.title=element_text(size=8)),
             scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
                scale_color_manual(values = res_colours)+
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:6), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^6)) +
              labs(colour = "Phenotype") +
              theme(axis.title=element_text(size=8)) +
              ggtitle("aac(6')-Ib-cr present")
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

Fig S7 panel - qnr genes, absence of aac6 and QRDR mutations, with DD data

upset_qnr_noAac6_DD <- cipro_antibiogram %>% 
  filter(aac6==0 & QRDR_mutations==0) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=qnr_genes, # counts are converted to binary
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45), axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
               scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
                scale_color_manual(values = res_colours)+
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("Disk diffusion data") +
              theme(axis.title=element_text(size=8), legend.position="none") + 
              ggtitle("aac(6')-Ib-cr absent") + ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_qnr_noAac6_DD

Fig S7 panel - qnr genes, presence of aac6 but no QRDR mutations, with DD data

upset_qnr_aac6_DD <- cipro_antibiogram %>% 
  filter(aac6==1 & QRDR_mutations==0) %>%
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=qnr_genes, # counts are converted to binary
      set_sizes=(upset_set_size() +
                    theme(axis.text.x=element_text(angle=45), axis.ticks.x=element_line())),
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
               scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
                scale_color_manual(values = res_colours)+
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("aac(6')-Ib-cr present") +
              theme(axis.title=element_text(size=8)) + ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_qnr_aac6_DD

Supp Figs S6 and S7: qnr genes vs pheno (MIC & DD, with/without aac6)

upset_qnr_noAac6_MIC | upset_qnr_aac6_MIC

ggsave("figs/FigS6_qnr_MIC.pdf", width=12, height=6)
ggsave("figs/FigS6_qnr_MIC.png", width=12, height=6)


upset_qnr_noAac6_DD | upset_qnr_aac6_DD

ggsave("figs/FigS7_qnr_DD.pdf", width=12, height=6)
ggsave("figs/FigS7_qnr_DD.png", width=12, height=6)

numbers for text - paragraph 5

# effect of qnr/qep genes, in absence of QRDR and aac6

# MIC data in absence of QRDR and aac6
qnr_MIC_nullBG_dat <- cipro_antibiogram %>%
  filter(Laboratory.Typing.Method !="Disk diffusion") %>%
  filter(aac6==0 & QRDR_mutations==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, acquired_genes) %>%
  mutate(acquired_bin=if_else(acquired_genes==0, 0, 1))

# MIC vs presence/absence of qnr, in absence of QRDR and aac6
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_bin==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.125   0.500   1.000   1.576   2.000   8.000
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_bin==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0300  0.2500  0.2500  0.3471  0.2500 64.0000
wilcox.test(log2(as.numeric(Measurement)) ~ acquired_bin, data=qnr_MIC_nullBG_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log2(as.numeric(Measurement)) by acquired_bin
## W = 145391, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
summary(lm(log2(as.numeric(Measurement)) ~ acquired_bin, data=qnr_MIC_nullBG_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ acquired_bin, data = qnr_MIC_nullBG_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1874 -0.0585 -0.0585 -0.0585  7.9415 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -1.94146    0.01628  -119.3   <2e-16 ***
## acquired_bin  2.12886    0.03979    53.5   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.915 on 3792 degrees of freedom
## Multiple R-squared:  0.4301, Adjusted R-squared:   0.43 
## F-statistic:  2862 on 1 and 3792 DF,  p-value: < 2.2e-16
# DD data in absence of QRDR and aac6
qnr_DD_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & QRDR_mutations==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, acquired_genes) %>%
  mutate(acquired_bin=if_else(acquired_genes==0, 0, 1))

# DD vs presence/absence of qnr, in absence of QRDR and aac6

summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_bin==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   17.00   21.00   19.51   22.00   31.00
summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_bin==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   27.00   29.00   28.67   30.00   44.00
wilcox.test(as.numeric(Measurement) ~ acquired_bin, data=qnr_DD_noGenes_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(Measurement) by acquired_bin
## W = 230345, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
summary(lm(as.numeric(Measurement) ~ acquired_bin, data=qnr_DD_noGenes_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ acquired_bin, data = qnr_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.6659  -1.6659   0.3341   1.3341  15.3341 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  28.66587    0.05979  479.41   <2e-16 ***
## acquired_bin -9.15545    0.31160  -29.38   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.996 on 2605 degrees of freedom
## Multiple R-squared:  0.2489, Adjusted R-squared:  0.2486 
## F-statistic: 863.3 on 1 and 2605 DF,  p-value: < 2.2e-16
# all genomes with no acquired genes (MIC/DD)
qnr_noGenes_dat <- cipro_antibiogram %>%
    filter(aac6==0 & QRDR_mutations==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, acquired_genes) %>%
  mutate(acquired_bin=if_else(acquired_genes==0, 0, 1))

dim(qnr_noGenes_dat)
## [1] 6401    6
# NWT vs presence/absence of qnr, in absence of QRDR and aac6
fisher.test(table(qnr_noGenes_dat$nonWT_binary, qnr_noGenes_dat$acquired_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(qnr_noGenes_dat$nonWT_binary, qnr_noGenes_dat$acquired_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   83.79906 149.94896
## sample estimates:
## odds ratio 
##   111.3326
# resistance vs presence/absence of qnr, in absence of QRDR and aac6
fisher.test(table(qnr_noGenes_dat$resistant, qnr_noGenes_dat$acquired_bin))
## 
##  Fisher's Exact Test for Count Data
## 
## data:  table(qnr_noGenes_dat$resistant, qnr_noGenes_dat$acquired_bin)
## p-value < 2.2e-16
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##   66.43958 105.46862
## sample estimates:
## odds ratio 
##   83.52862
# MIC vs qnr count
summary(lm(log2(as.numeric(Measurement)) ~ acquired_genes, data=qnr_MIC_nullBG_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ acquired_genes, 
##     data = qnr_MIC_nullBG_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1242 -0.0653 -0.0653 -0.0653  7.9347 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    -1.93471    0.01626 -118.98   <2e-16 ***
## acquired_genes  2.02480    0.03799   53.29   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9165 on 3792 degrees of freedom
## Multiple R-squared:  0.4282, Adjusted R-squared:  0.4281 
## F-statistic:  2840 on 1 and 3792 DF,  p-value: < 2.2e-16
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_genes==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.125   0.500   1.000   1.547   2.000   8.000
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_genes==2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.500   1.000   2.000   2.475   4.000   4.000
summary(as.numeric(qnr_MIC_nullBG_dat$Measurement)[qnr_MIC_nullBG_dat$acquired_genes>2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 
# DD vs qnr count
summary(lm(as.numeric(Measurement) ~ acquired_genes, data=qnr_DD_noGenes_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ acquired_genes, data = qnr_DD_noGenes_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.6642  -1.6642   0.3358   1.3358  15.3358 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    28.66417    0.05939  482.65   <2e-16 ***
## acquired_genes -8.65839    0.28782  -30.08   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.978 on 2605 degrees of freedom
## Multiple R-squared:  0.2578, Adjusted R-squared:  0.2575 
## F-statistic:   905 on 1 and 2605 DF,  p-value: < 2.2e-16
summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_genes==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   18.00   21.00   19.91   22.50   31.00
summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_genes==2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     6.0     6.0    10.0    12.2    17.0    22.0
summary(as.numeric(qnr_DD_noGenes_dat$Measurement)[qnr_DD_noGenes_dat$acquired_genes>2])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 

Figure 2 - number of qnr/qep vs MIC, facet aac6 yes/no (no QRDR)

# MIC distribution for # QRDR, in absence of genes
qnr_MIC_noGenes <- cipro_antibiogram %>%
    filter(QRDR_mutations==0) %>% 
    filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>% 
    mutate(aac6_label=if_else(aac6==0, "aac(6')-Ib-cr absent", "aac(6')-Ib-cr present")) %>%
    ggplot(aes(x=factor(acquired_genes), y=as.numeric(Measurement))) + 
    geom_violin() + 
    geom_count(aes(colour = SRnwt)) + 
    geom_hline(aes(yintercept = 1), linetype = 2, alpha = 0.6, color = "black")  +
    geom_hline(aes(yintercept = 0.25), linetype = 1, alpha = 0.6, color = "black")  +
    facet_wrap(vars(aac6_label)) +
    scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:9), labels = function(x) round(as.numeric(x), digits = 3)) +
    scale_color_manual(values = res_colours) + 
    theme_light() + 
    labs(y="MIC (mg/L)", x="", col="Phenotype", 
         title="b) No. PMQR genes vs phenotype",
         subtitle="(in absence of QRDR mutations)")


qnr_pheno_noGenes <- cipro_antibiogram %>%
    filter(QRDR_mutations==0) %>% 
    filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
    mutate(aac6_label=if_else(aac6==0, "aac(6')-Ib-cr absent", "aac(6')-Ib-cr present")) %>%
    ggplot(aes(x=factor(acquired_genes), fill=SRnwt)) + 
    geom_bar(stat='count', position='fill') + 
    facet_wrap(vars(aac6_label)) +
       scale_fill_manual(values = res_colours) + 
    scale_y_continuous(labels=scales::percent_format()) +
    geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), size=2) +
    theme_light() + 
    labs(y="% Phenotype", x="Number of PMQR genes") + 
    theme(legend.position="none", strip.background = element_blank(), strip.text = element_blank())

Fig 2

QRDR_MIC_noGenes / QRDR_pheno_noGenes / qnr_MIC_noGenes / qnr_pheno_noGenes + plot_layout(heights=c(3,1, 3, 1))

ggsave("figs/Fig2_numQnr_numPMQR_MIC_pheno.pdf", width=6, height=10)
ggsave("figs/Fig2_numQnr_numPMQR_MIC_pheno.png", width=6, height=10)

Fig S8 - number of qnr vs DD, facet aac6 yes/no (no QRDR)

# DD distribution for # qnr, in absence of genes
qnr_DD_noGenes <- cipro_antibiogram %>%
    filter(QRDR_mutations==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>% 
    mutate(aac6_label=if_else(aac6==0, "aac(6')-Ib-cr absent", "aac(6')-Ib-cr present")) %>%
    ggplot(aes(x=factor(acquired_genes), y=as.numeric(Measurement))) + 
    geom_violin() + 
    geom_count(aes(colour = SRnwt)) + 
    geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
    geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
    facet_wrap(vars(aac6_label)) +
      scale_color_manual(values = res_colours) + 
    theme_light() + 
    labs(y="Disk zone (mm)", x="", col="Phenotype",
         title="No. PMQR genes vs phenotype",
         subtitle="(in absence of QRDR mutations)")


qnr_DDpheno_noGenes <- cipro_antibiogram %>%
    filter(QRDR_mutations==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
    mutate(aac6_label=if_else(aac6==0, "aac(6')-Ib-cr absent", "aac(6')-Ib-cr present")) %>%
    ggplot(aes(x=factor(acquired_genes), fill=SRnwt)) + 
    geom_bar(stat='count', position='fill') + 
    facet_wrap(vars(aac6_label)) +
      scale_fill_manual(values = res_colours) + 
    scale_y_continuous(labels=scales::percent_format()) +
    geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), size=2) +
    theme_light() + 
    labs(y="% Phenotype", x="Number of PMQR genes") + 
    theme(legend.position="none", strip.background = element_blank(), strip.text = element_blank())
qnr_DD_noGenes / qnr_DDpheno_noGenes + plot_layout(heights=c(2,1))

ggsave("figs/FigS8_numQnr_DD_pheno.pdf", width=6, height=4)
ggsave("figs/FigS8_numQnr_DD_pheno.png", width=6, height=4)

Fig3 panel b - aac6, QRDR mutations, qnr with DD data

upset_aac6_simple_DD <- cipro_antibiogram %>% 
  filter(Laboratory.Typing.Method == "Disk diffusion") %>%
  upset(
      intersect=c("aac6", "QRDR_mutations","acquired_genes"),  # counts are converted to binary
          labeller=ggplot2::as_labeller(c(
        'QRDR_mutations'='QRDR mutations',
        'acquired_genes'='PMQR genes', 
        'aac6'="aac(6')-Ib-cr")),      
      set_sizes=F,
  sort_intersections_by='degree',
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
             scale_fill_manual(values = res_colours),
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with disk diffusion data
        'Zone diameter (mm)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 21.0), linetype = 1, alpha = 0.6, color = "black") +
              geom_hline(aes(yintercept = 25.0), linetype = 2, alpha = 0.6, color = "black") +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
              scale_color_manual(values = res_colours)+
              guides(size = "none") +
              labs(linetype = "CLSI/EUCAST", colour = "Phenotype") +
              ggtitle("Disk diffusion data") +
              ylim(5,50)
        )
      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_aac6_simple_DD

Fig3 panel a - aac6, QRDR mutations, qnr with MIC data

upset_aac6_simple_MIC <- cipro_antibiogram %>% 
  filter(grepl('MIC.*$', Laboratory.Typing.Method)) %>%
  upset(
      intersect=c("aac6", "QRDR_mutations","acquired_genes"),  # counts are converted to binary
          labeller=ggplot2::as_labeller(c(
        'QRDR_mutations'='QRDR mutations',
        'acquired_genes'='PMQR genes', 
        'aac6'="aac(6')-Ib-cr")),      
      set_sizes=F,
      sort_intersections_by='degree',
      base_annotations=list( # add stacked barplot showing S/nonS
        'Percent of genomes'=list(
        aes=aes(x=intersection, fill=SRnwt),
          geom=list(
            geom_bar(stat='count', position='fill'), 
             scale_fill_manual(values = res_colours) ,
            scale_y_continuous(labels=scales::percent_format()),
            theme(legend.position = "none", axis.title=element_text(size=8)),
            geom_text(aes(label=..count..), stat="count", position=position_fill(vjust = .5), angle = 45, size=2))
        )
      ), 
      annotations= list( # add violin plot with MIC data
        'MIC measurement (mg/L)'=(
            ggplot(mapping=aes(y=as.numeric(Measurement))) +
              geom_hline(aes(yintercept = 1), linetype = 1, alpha = 0.6, color = "black")  +
              geom_hline(aes(yintercept = 0.25), linetype = 2, alpha = 0.6, color = "black")  +
              geom_violin()+ 
              geom_count(aes(colour = SRnwt))+
               scale_color_manual(values = res_colours)+
              scale_y_continuous(trans = log2_trans(), breaks = 2^(-5:6), labels = function(x) round(as.numeric(x), digits = 3), limits=c(2^-5,2^6)) +
              labs(colour = "Phenotype") +
              theme(legend.position="none") + 
              ggtitle("MIC data")
        )

      )
    ) + patchwork::plot_layout(heights=c(3,1,1)) # relative heights of plotting areas

upset_aac6_simple_MIC

Fig3: aac6, QRDR mutations, qnr with MIC & DD data

upset_aac6_simple_MIC | upset_aac6_simple_DD

ggsave("figs/Fig3_simple_comb_MIC_DD.pdf", width=12, height=6)
ggsave("figs/Fig3_simple_comb_MIC_DD.png", width=12, height=6)

numbers for text - paragraph 6, effect of aac6 gene, in absence of QRDR and other acquired

# MIC
aac_MIC_nullBG_dat <- cipro_antibiogram %>%
  filter(Laboratory.Typing.Method !="Disk diffusion") %>%
    filter(acquired_genes==0 & QRDR_mutations==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, aac6)

# MIC vs presence/absence of qnr
wilcox.test(log2(as.numeric(Measurement)) ~ aac6, data=aac_MIC_nullBG_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  log2(as.numeric(Measurement)) by aac6
## W = 177516, p-value = 1.902e-12
## alternative hypothesis: true location shift is not equal to 0
summary(lm(log2(as.numeric(Measurement)) ~ aac6, data=aac_MIC_nullBG_dat))
## 
## Call:
## lm(formula = log2(as.numeric(Measurement)) ~ aac6, data = aac_MIC_nullBG_dat)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.1174 -0.0585 -0.0585 -0.0585  7.9415 
## 
## Coefficients:
##             Estimate Std. Error  t value Pr(>|t|)    
## (Intercept) -1.94146    0.01541 -126.006  < 2e-16 ***
## aac6         0.58255    0.07283    7.999 1.73e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.866 on 3305 degrees of freedom
## Multiple R-squared:  0.01899,    Adjusted R-squared:  0.01869 
## F-statistic: 63.98 on 1 and 3305 DF,  p-value: 1.725e-15
summary(as.numeric(aac_MIC_nullBG_dat$Measurement)[aac_MIC_nullBG_dat$aac6==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.1200  0.2500  0.2500  0.6021  0.5000  4.0000
summary(as.numeric(aac_MIC_nullBG_dat$Measurement)[aac_MIC_nullBG_dat$aac6==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0300  0.2500  0.2500  0.3471  0.2500 64.0000
aac_MIC_nullBG_dat %>% group_by(aac6) %>% summarise(S=mean(Resistance.phenotype=="S"), I=mean(Resistance.phenotype=="I"), R=mean(Resistance.phenotype=="R"))
# DD
aac_DD_nullBG_dat <- cipro_antibiogram %>%
    filter(acquired_genes==0 & QRDR_mutations==0) %>% 
    filter(Laboratory.Typing.Method=="Disk diffusion") %>%
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, aac6)

wilcox.test(as.numeric(Measurement) ~ aac6, data=aac_DD_nullBG_dat)
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  as.numeric(Measurement) by aac6
## W = 25156, p-value = 5.356e-05
## alternative hypothesis: true location shift is not equal to 0
summary(lm(as.numeric(Measurement) ~ aac6, data=aac_DD_nullBG_dat))
## 
## Call:
## lm(formula = as.numeric(Measurement) ~ aac6, data = aac_DD_nullBG_dat)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -22.6659  -1.6659   0.3341   1.3341  15.3341 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 28.66587    0.05766 497.182  < 2e-16 ***
## aac6        -5.08254    0.83602  -6.079 1.39e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.889 on 2521 degrees of freedom
## Multiple R-squared:  0.01445,    Adjusted R-squared:  0.01406 
## F-statistic: 36.96 on 1 and 2521 DF,  p-value: 1.389e-09
summary(as.numeric(aac_DD_nullBG_dat$Measurement)[aac_DD_nullBG_dat$aac6==1])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   16.00   21.00   23.50   23.58   27.25   29.00
summary(as.numeric(aac_DD_nullBG_dat$Measurement)[aac_DD_nullBG_dat$aac6==0])
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    6.00   27.00   29.00   28.67   30.00   44.00
aac_DD_nullBG_dat %>% group_by(aac6) %>% summarise(S=mean(Resistance.phenotype=="S"), I=mean(Resistance.phenotype=="I"), R=mean(Resistance.phenotype=="R"))
# all genomes with no acquired genes (MIC/DD)
qnr_noGenes_dat <- cipro_antibiogram %>%
    filter(acquired_genes==0 & QRDR_mutations==0) %>% 
  select(Measurement, Resistance.phenotype, resistant, nonWT_binary, aac6)

dim(qnr_noGenes_dat)
## [1] 5830    5